Submission due on 5/31
# Load data
setwd("/Users/erin/Desktop/Spring2020/GIS3/labs")
load("census_2011_UK_OA.RData")
#subset to liverpool
Census_2011_Count <- merge(Liverpool,Census_2011_Count_All,by="OA",all.x=TRUE)
# calculate the numerators
head(OAC_Input_Lookup[,])
## VariableCode Type Denominator SubDomain Domain VariableDescription
## 1 k001 Count KS102EW0001 Population Age Demographic Age 0 to 4
## 2 k002 Count KS102EW0001 Population Age Demographic Age 5 to 14
## 3 k003 Count KS102EW0001 Population Age Demographic Age 25 to 44
## 4 k004 Count KS102EW0001 Population Age Demographic Age 45 to 64
## 5 k005 Count KS102EW0001 Population Age Demographic Age 65 to 89
## 6 k006 Count KS102EW0001 Population Age Demographic Age 90 and over
## England_Wales
## 1 KS102EW0002
## 2 KS102EW0003,KS102EW0004,KS102EW0005
## 3 KS102EW0010,KS102EW0011
## 4 KS102EW0012,KS102EW0013
## 5 KS102EW0014,KS102EW0015,KS102EW0016
## 6 KS102EW0017
OAC_Input <- as.data.frame(Census_2011_Count$OA)
colnames(OAC_Input) <- "OA"
# Loop through each row in the OAC input table
for (n in 1:nrow(OAC_Input_Lookup)){
# Get the variables to aggregate for the row specified by n
select_vars <- OAC_Input_Lookup[n,"England_Wales"]
# Create a list of the variables to select
select_vars <- unlist(strsplit(paste(select_vars),","))
# Create variable name
vname <- OAC_Input_Lookup[n,"VariableCode"]
# Creates a sum of the census variables for each Output Area
tmp <- data.frame(rowSums(Census_2011_Count[,select_vars, drop=FALSE]))
colnames(tmp) <- vname
# Append new variable to the OAC_Input object
OAC_Input <- cbind(OAC_Input,tmp)
# Remove temporary objects
remove(list = c("vname","tmp"))
} # END: Loop through each row in the OAC input table
#Remove attributes for SIR
OAC_Input$k035 <- NULL
# calculate the denominators
OAC_Input_den <- as.data.frame(Census_2011_Count$OA)
colnames(OAC_Input_den) <- "OA"
# Create a list of unique denominators
den_list <- unique(OAC_Input_Lookup[,"Denominator"])
den_list <- paste(den_list[den_list != ""])
# Select denominators
OAC_Input_den <- Census_2011_Count[,c("OA",den_list)]
#Merge
OAC_Input <- merge(OAC_Input,OAC_Input_den, by="OA")
# calculate percentages
# Get numerator denominator list where the Type is "Count" - i.e. not ratio
K_Var <- OAC_Input_Lookup[OAC_Input_Lookup$Type == "Count",c(1,3)]
# View top 6 rows
head(K_Var)
## VariableCode Denominator
## 1 k001 KS102EW0001
## 2 k002 KS102EW0001
## 3 k003 KS102EW0001
## 4 k004 KS102EW0001
## 5 k005 KS102EW0001
## 6 k006 KS102EW0001
# Create an OA list / data frame
OAC_Input_PCT_RATIO <- subset(OAC_Input, select = "OA")
# Loop
for (n in 1:nrow(K_Var)){
num <- paste(K_Var[n,"VariableCode"]) # Get numerator name
den <- paste(K_Var[n,"Denominator"]) # Get denominator name
tmp <- data.frame(OAC_Input[,num] / OAC_Input[,den] * 100) # Calculate percentages
colnames(tmp) <- num
OAC_Input_PCT_RATIO <- cbind(OAC_Input_PCT_RATIO,tmp) # Append the percentages
# Remove temporary objects
remove(list = c("tmp","num","den"))
}
#Extract Variable
tmp <- Census_2011_Count[,c("OA","KS101EW0008")]
colnames(tmp) <- c("OA","k007")
#Merge
OAC_Input_PCT_RATIO <- merge(OAC_Input_PCT_RATIO,tmp,by="OA")
# Calculate SIR for each subset of the Liverpool data
# Calculate rates of ill people 15 or less and greater than or equal to 65
ill_16_64 <- rowSums(Census_2011_Count[,c("KS301EW0005","KS301EW0006")]) # Ill people 16-64
ill_total <- rowSums(Census_2011_Count[,c("KS301EW0002","KS301EW0003")]) # All ill people
ill_L15_G65 <- ill_total - ill_16_64 # Ill people 15 or less and greater than or equal to 65
# Calculate total people 15 or less and greater than or equal to 65
t_pop_16_64 <- rowSums(Census_2011_Count[,c("KS102EW0007","KS102EW0008","KS102EW0009","KS102EW0010","KS102EW0011","KS102EW0012","KS102EW0013")]) # People 16-64
t_pop <- Census_2011_Count$KS101EW0001 # All people
t_pop_L15_G65 <- t_pop - t_pop_16_64 # All people 15 or less and greater than or equal to 65
# Calculate expected rate
ex_ill_16_64 <- t_pop_16_64 * (sum(ill_16_64)/sum(t_pop_16_64)) # Expected ill 16-64
ex_ill_L15_G65 <- t_pop_L15_G65 * (sum(ill_L15_G65)/sum(t_pop_L15_G65)) # Expected ill people 15 or less and greater than or equal to 65
ex_ill <- ex_ill_16_64 + ex_ill_L15_G65 # total expected ill people
# Ratio
SIR <- as.data.frame(ill_total / ex_ill * 100) # ratio between ill people and expected ill people
colnames(SIR) <- "k035"
# Merge data
OAC_Input_PCT_RATIO <- cbind(OAC_Input_PCT_RATIO,SIR)
# Remove unwanted objects
remove(list=c("SIR","ill_16_64","ill_total","ill_L15_G65","t_pop_16_64","t_pop","t_pop_L15_G65","ex_ill_16_64","ex_ill_L15_G65","ex_ill"))
# apply the procedures to the input data
# Calculate inverse hyperbolic sine
OAC_Input_PCT_RATIO_IHS <- log(OAC_Input_PCT_RATIO[,2:61]+sqrt(OAC_Input_PCT_RATIO[,2:61]^2+1))
# Calculate Range
range_01 <- function(x){(x-min(x))/(max(x)-min(x))} # range function
OAC_Input_PCT_RATIO_IHS_01 <- apply(OAC_Input_PCT_RATIO_IHS, 2, range_01) # apply range function to columns
# Add the OA codes back onto the data frame as row names
rownames(OAC_Input_PCT_RATIO_IHS_01) <- OAC_Input_PCT_RATIO$OA
library(ggplot2)
# Create a new empty numeric object to store the wss results
wss <- numeric()
# Run k means for 2-12 clusters and store the wss results
for (i in 2:12) wss[i] <- sum(kmeans(OAC_Input_PCT_RATIO_IHS_01, centers=i,nstart=20)$withinss)
# Create a data frame with the results, adding a further column for the cluster number
wss <- data.frame(2:12,wss[-1])
# Plot the results
names(wss) <- c("k","Twss")
ggplot(data=wss, aes(x= k, y=Twss)) + geom_path() + geom_point() + scale_x_continuous(breaks=2:12) + labs(y = "Total within sum of squares")
# moving forward with 7 clusters
# Load cluster object
setwd("/Users/erin/Desktop/Spring2020/GIS3/labs")
load("cluster_7.Rdata")
# Show object content
str(cluster_7)
## List of 9
## $ cluster : Named int [1:1584] 7 5 7 5 5 7 5 1 1 4 ...
## ..- attr(*, "names")= chr [1:1584] "E00032987" "E00032988" "E00032989" "E00032990" ...
## $ centers : num [1:7, 1:60] 0.553 0.584 0.677 0.666 0.391 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : chr [1:7] "1" "2" "3" "4" ...
## .. ..$ : chr [1:60] "k001" "k002" "k003" "k004" ...
## $ totss : num 2827
## $ withinss : num [1:7] 286 308 250 255 159 ...
## $ tot.withinss: num 1635
## $ betweenss : num 1192
## $ size : int [1:7] 259 340 279 334 109 73 190
## $ iter : int 6
## $ ifault : int 0
## - attr(*, "class")= chr "kmeans"
# Lookup Table
lookup <- data.frame(cluster_7$cluster)
# Add OA codes
lookup$OA <- rownames(lookup)
colnames(lookup) <- c("K_7","OA")
# Recode clusters as letter
lookup$SUPER <- LETTERS[lookup$K_7]
table(lookup$K_7)
##
## 1 2 3 4 5 6 7
## 259 340 279 334 109 73 190
# Load packages
library(rgdal)
## Loading required package: sp
## rgdal: version: 1.4-8, (SVN revision 845)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.4.2, released 2019/06/28
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.6/Resources/library/rgdal/gdal
## GDAL binary built with GEOS: FALSE
## Loaded PROJ.4 runtime: Rel. 5.2.0, September 15th, 2018, [PJ_VERSION: 520]
## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.6/Resources/library/rgdal/proj
## Linking to sp version: 1.3-2
library(tmap)
## Warning: package 'tmap' was built under R version 3.6.2
# Import OA boundaries
setwd("/Users/erin/Desktop/Spring2020/GIS3/labs")
liverpool_SP <- readOGR("Liverpool_OA_2011.geojson", layer="Liverpool_OA_2011")
## OGR data source with driver: GeoJSON
## Source: "/Users/erin/Desktop/Spring2020/GIS3/labs/Liverpool_OA_2011.geojson", layer: "Liverpool_OA_2011"
## with 1584 features
## It has 1 fields
# Merge lookup
liverpool_SP <- merge(liverpool_SP, lookup, by.x="oa_code",by.y="OA")
m <- tm_shape(liverpool_SP, projection=27700) +
tm_polygons(col="SUPER", border.col = "grey50", palette="Set3",border.alpha = .3, title="Cluster", showNA=FALSE) +
tm_layout(legend.position = c("left", "bottom"), frame = FALSE) +
tm_basemap(leaflet::providers$CartoDB.DarkMatter)
#Create leaflet plot
tmap_leaflet(m)
## Warning: The shape liverpool_SP is invalid. See sf::st_is_valid
## Warning: package 'sf' was built under R version 3.6.2
## Linking to GEOS 3.7.2, GDAL 2.4.2, PROJ 5.2.0
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.